This document contains analysis of differences in microbial alpha diversity between healthy samples from American Gut Project and samples with diagnosed Inflammatory bowel syndrome (Ulcerative colitis and Crohn’s disease) and Clostridium difficile infection, as well as samples after fecal microbiota transplantation.
The aim of this analysis is to show that there are significant differences between microbiome alpha diversity in healthy and disease samples. Furthermore, we want to show that fecal transplantation improves alpha diversity in short- and long-term.
Loading libraries:
#install.packages("readr")
library("readr")
#install.packages('data.table')
library(data.table)
library(stringr)
#install.packages('ggplot2')
library(ggplot2)
library(gridExtra)
library(grid)
library(plyr)
library(purrr)
library(dplyr)
#install.packages('flextable')
library(flextable)
library(tibble)
library(ComplexHeatmap)
library(RColorBrewer)
library(ggplotify)
library(here)
Load healthy subsample of AGP data:
# Load all AGP data
AGP <- read.csv(here("01_tidy_data", "AGP_all.csv.gz"), header = TRUE, sep = ",")
# Load healthy samples' table
all_healthy <- read.csv(here("01_tidy_data", "AGP_healthy.csv.gz"), header = TRUE, sep = ",")
all_healthy <- select(all_healthy, sample_id, shannon_entropy, chao1, menhinick, margalef, fisher_alpha, simpson, pielou_evenness, gini_index, strong, simpson, faith_pd, sex, race, host_age)
all_healthy$condition <- 'healthy'
all_healthy$qiita_study_id <- AGP$qiita_study_id[match(all_healthy$sample_id, AGP$sample_id)]
names(all_healthy)[names(all_healthy) == 'host_age'] <- 'age'
Load Inflammatory Bowel Disease data:
all_IBD <- read.csv(here("01_tidy_data", "IBD_all.csv.gz"), header = TRUE, sep = ",")
Load Longitudinal Chron’s disease study data:
CD_2 <- read.csv(here("01_tidy_data", "all_CD_2.csv.gz"), header = TRUE, sep = ",")
Load Changes following fecal microbial transplantation for recurrent CDI data:
C_diff_trans <- read.csv(here("01_tidy_data", "all_C_diff_trans.csv.gz"), header = TRUE, sep = ",")
Load Fecal transplant - CDI with underlying IBD data:
trans_IBD_CDI <- read.csv(here("01_tidy_data", "all_trans_IBD_CDI.csv.gz"), header = TRUE, sep = ",")
Let’s define vector of names of the alpha diversity metrics that are going to be analysed:
metric <- c("shannon_entropy", "chao1", "menhinick", "margalef", "fisher_alpha", "simpson", "gini_index", "strong", "pielou_evenness", "faith_pd" )
#merge two datasets
healthy_disease <- rbind.fill(all_healthy, all_IBD)
healthy_disease$condition <- as.factor(healthy_disease$condition)
healthy_disease$condition <- relevel(healthy_disease$condition, "healthy")
table(healthy_disease$condition)
##
## healthy CD UC
## 600 24 40
nrow(healthy_disease)
## [1] 664
Generate a vector of 10 random colors for histograms:
qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 'qual',]
colors <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
histo_IBD <- vector('list', length(metric))
for (i in 1:length(metric)){
histo_IBD[[i]] <- all_IBD %>% ggplot(aes(x = .data[[metric[i]]])) +
geom_histogram(aes(y=..density..), colour="black", fill="white", bins=30)+
geom_density(alpha=.2, fill=colors[i]) +
xlab(label = metric[i]) +
ylab(label = "density")
}
grid.arrange(histo_IBD[[1]], histo_IBD[[2]],histo_IBD[[3]], histo_IBD[[4]],histo_IBD[[5]], histo_IBD[[6]],histo_IBD[[7]], histo_IBD[[8]],histo_IBD[[9]], histo_IBD[[10]], ncol=3, top = textGrob("Distributions of 1) Shannon's index 2) Chao1 3) Menhinick's index \n4) Margalef's index 5) Fisher's index 6) Simpson 7) Gini index 8) Strong's index 9) Pielou's evenness \nand 10) Faith's PD in IBD datasets", gp=gpar(fontsize=10,font=2)))
histo_healthy <- vector('list', length(metric))
for (i in 1:length(metric)){
histo_healthy[[i]] <- all_healthy %>% ggplot(aes(x = .data[[metric[i]]])) +
geom_histogram(aes(y=..density..), colour="black", fill="white", bins=30)+
geom_density(alpha=.2, fill=colors[i]) +
xlab(label = metric[i]) +
ylab(label = "density")
}
grid.arrange(histo_healthy[[1]], histo_healthy[[2]], histo_healthy[[3]], histo_healthy[[4]], histo_healthy[[5]], histo_healthy[[6]], histo_healthy[[7]], histo_healthy[[8]], histo_healthy[[9]], histo_IBD[[10]], ncol=3, top = textGrob("Distributions of 1) Shannon's index 2) Chao1 3) Menhinick's index \n4) Margalef's index 5) Fisher's index 6) Simpson 7) Gini index 8) Strong's index 9) Pielou's evenness \nand 10) Faith's PD in healthy dataset", gp=gpar(fontsize=10,font=2)))
Density plots and Box plots
density <- vector('list', length(metric))
box <- vector('list', length(metric))
for (i in 1:length(metric)){
mean_line <- healthy_disease %>% dplyr::group_by(condition) %>% dplyr::summarise(grp_mean=mean(.data[[metric[i]]]))
density[[i]] <- healthy_disease %>% ggplot(aes(x = .data[[metric[i]]], color = condition)) +
geom_density()+
geom_vline(data = mean_line, aes(xintercept = grp_mean, color = condition), linetype = "dashed")+
labs(x = metric[i])
box[[i]] <- healthy_disease %>% ggplot(aes(x = .data[[metric[i]]], color = condition)) +
geom_boxplot() +
labs(x = metric[i])
if(metric[i] != "shannon_entropy" & metric[i] !="strong" & metric[i] != "gini_index" & metric[i] != "menhinick"){
density [[i]] <- density [[i]] +
scale_x_continuous(trans = 'log10') +
xlab(paste(metric[i], "(log10)", sep = " "))
box [[i]] <- box [[i]] +
scale_x_continuous(trans = 'log10') +
xlab(paste(metric[i], "(log10)", sep = " "))
}
}
# Show plots
for (j in 1:length(metric)){
grid.arrange(density[[j]], box[[j]], ncol=2, top = paste("Density and box plot comparing healthy vs diseases data for metric:", metric[j], sep=" "))
}
Explore differences in distribution shape and mean values of groups with different conditions by ploting violin plot:
violin <- vector('list', length(metric))
for (i in 1:length(metric)){
mean_line <- healthy_disease %>% dplyr::group_by(condition) %>% dplyr::summarise(grp_mean=mean(.data[[metric[i]]]))
plot_data <- healthy_disease %>% dplyr::group_by(condition) %>% dplyr::mutate(m = mean(.data[[metric[i]]]))
violin[[i]] <- plot_data %>% ggplot(aes(x = .data[[metric[i]]], y = reorder(condition, -m), color = condition, fill = condition)) +
geom_violin()+
geom_boxplot(width=0.1, color="grey", alpha=0.2) +
geom_vline(data = mean_line, aes(xintercept = grp_mean, color = condition), linetype = "dashed")+
labs(x = metric[i])+
ylab("")+
#ggtitle("A Violin plot of distributions of alpha metrics in different conditions")+
theme(legend.position="none")
if(metric[i] != "shannon_entropy" & metric[i] !="strong" & metric[i] != "gini_index" & metric[i] != "menhinick"){
violin [[i]] <- violin [[i]] +
scale_x_continuous(trans = 'log10') +
xlab(paste(metric[i], "(log10)", sep = " "))
}
}
# Show plots
#violin
grid.arrange(violin[[1]], violin[[2]],violin[[3]], violin[[4]],violin[[5]], violin[[6]],violin[[7]], violin[[8]],violin[[9]], violin[[10]], ncol=4, top = textGrob("Distributions of 1) Shannon's index 2) Chao1 3) Menhinick's index \n4) Margalef's index 5) Fisher's index 6) Simpson 7) Gini index 8) Strong's index 9) Pielou's evenness \nand 10) Faith's PD in IBD datasets", gp=gpar(fontsize=10,font=2)))
Test whether different conditions separate into distinct distributions with Mann-Whitney-Wilcoxon test:
test_helathy_IBD <- list()
for (i in 1:length(metric)){
test_helathy_IBD[[i]] <- pairwise.wilcox.test(healthy_disease[[metric[i]]], healthy_disease$condition, p.adjust.method="none") %>%
broom::tidy() %>% add_column(parameter = metric[i], .before='group1')
test_helathy_IBD[[i]]$p.value <- round(test_helathy_IBD[[i]]$p.value, digits = 17)
}
tests <- do.call(what = rbind, args = test_helathy_IBD)
table1 <- tests %>%
add_column(p.adjusted = p.adjust(tests$p.value, "fdr"), .after='p.value') %>%
flextable() %>%
bold(~ p.value < 0.05, 4) %>%
bold(~ p.adjusted < 0.05, 5) %>%
add_header_lines(values = "Results of the Wilcox test for distributions of different conditions")
table2 <- tests %>%
add_column(p.adjusted = p.adjust(tests$p.value, "fdr"), .after='p.value') %>%
arrange(p.value) %>%
flextable() %>%
bold(~ p.value < 0.05, 4) %>%
bold(~ p.adjusted < 0.05, 5) %>%
add_header_lines(values = "Results of the Wilcox test for distributions of different conditions")
Orered by parameter:
table1
Results of the Wilcox test for distributions of different conditions | ||||
parameter | group1 | group2 | p.value | p.adjusted |
shannon_entropy | CD | healthy | 0.39124311325787869 | 0.510317104249407016 |
shannon_entropy | UC | healthy | 0.01296003473847978 | 0.022870649538493732 |
shannon_entropy | UC | CD | 0.31721308809088172 | 0.451111941685758855 |
chao1 | CD | healthy | 0.82814304463167232 | 0.881446671454585329 |
chao1 | UC | healthy | 0.00000038693590132 | 0.000001658296719943 |
chao1 | UC | CD | 0.00122770386886685 | 0.003683111606600550 |
menhinick | CD | healthy | 0.00002719515391959 | 0.000090650513065300 |
menhinick | UC | healthy | 0.00000000000001863 | 0.000000000000139725 |
menhinick | UC | CD | 0.00292869423612998 | 0.005857388472259960 |
margalef | CD | healthy | 0.65204153563271072 | 0.724490595147456395 |
margalef | UC | healthy | 0.00000233349016210 | 0.000008750588107875 |
margalef | UC | CD | 0.00292869423612998 | 0.005857388472259960 |
fisher_alpha | CD | healthy | 0.14776457972183557 | 0.233312494297635120 |
fisher_alpha | UC | healthy | 0.00000002482763666 | 0.000000124138183300 |
fisher_alpha | UC | CD | 0.00292869423612998 | 0.005857388472259960 |
simpson | CD | healthy | 0.85206511573943244 | 0.881446671454585329 |
simpson | UC | healthy | 0.28343319895771257 | 0.425149798436568860 |
simpson | UC | CD | 0.45263274746491944 | 0.565790934331149353 |
gini_index | CD | healthy | 0.00000000000000009 | 0.000000000000001350 |
gini_index | UC | healthy | 0.00000000000000000 | 0.000000000000000000 |
gini_index | UC | CD | 0.09214651379693838 | 0.153577522994897298 |
strong | CD | healthy | 0.00143769914583817 | 0.003920997670467736 |
strong | UC | healthy | 0.00180923525465334 | 0.004523088136633350 |
strong | UC | CD | 0.33081542390288987 | 0.451111941685758855 |
pielou_evenness | CD | healthy | 0.57113507023141241 | 0.659002004113168116 |
pielou_evenness | UC | healthy | 0.56860881304145416 | 0.659002004113168116 |
pielou_evenness | UC | CD | 0.98353217845023189 | 0.983532178450231886 |
faith_pd | CD | healthy | 0.00000000000000076 | 0.000000000000007600 |
faith_pd | UC | healthy | 0.00390376038973288 | 0.007319550730749151 |
faith_pd | UC | CD | 0.00000000072599039 | 0.000000004355942340 |
Ordered by signifficance:
table2
Results of the Wilcox test for distributions of different conditions | ||||
parameter | group1 | group2 | p.value | p.adjusted |
gini_index | UC | healthy | 0.00000000000000000 | 0.000000000000000000 |
gini_index | CD | healthy | 0.00000000000000009 | 0.000000000000001350 |
faith_pd | CD | healthy | 0.00000000000000076 | 0.000000000000007600 |
menhinick | UC | healthy | 0.00000000000001863 | 0.000000000000139725 |
faith_pd | UC | CD | 0.00000000072599039 | 0.000000004355942340 |
fisher_alpha | UC | healthy | 0.00000002482763666 | 0.000000124138183300 |
chao1 | UC | healthy | 0.00000038693590132 | 0.000001658296719943 |
margalef | UC | healthy | 0.00000233349016210 | 0.000008750588107875 |
menhinick | CD | healthy | 0.00002719515391959 | 0.000090650513065300 |
chao1 | UC | CD | 0.00122770386886685 | 0.003683111606600550 |
strong | CD | healthy | 0.00143769914583817 | 0.003920997670467736 |
strong | UC | healthy | 0.00180923525465334 | 0.004523088136633350 |
menhinick | UC | CD | 0.00292869423612998 | 0.005857388472259960 |
margalef | UC | CD | 0.00292869423612998 | 0.005857388472259960 |
fisher_alpha | UC | CD | 0.00292869423612998 | 0.005857388472259960 |
faith_pd | UC | healthy | 0.00390376038973288 | 0.007319550730749151 |
shannon_entropy | UC | healthy | 0.01296003473847978 | 0.022870649538493732 |
gini_index | UC | CD | 0.09214651379693838 | 0.153577522994897298 |
fisher_alpha | CD | healthy | 0.14776457972183557 | 0.233312494297635120 |
simpson | UC | healthy | 0.28343319895771257 | 0.425149798436568860 |
shannon_entropy | UC | CD | 0.31721308809088172 | 0.451111941685758855 |
strong | UC | CD | 0.33081542390288987 | 0.451111941685758855 |
shannon_entropy | CD | healthy | 0.39124311325787869 | 0.510317104249407016 |
simpson | UC | CD | 0.45263274746491944 | 0.565790934331149353 |
pielou_evenness | UC | healthy | 0.56860881304145416 | 0.659002004113168116 |
pielou_evenness | CD | healthy | 0.57113507023141241 | 0.659002004113168116 |
margalef | CD | healthy | 0.65204153563271072 | 0.724490595147456395 |
chao1 | CD | healthy | 0.82814304463167232 | 0.881446671454585329 |
simpson | CD | healthy | 0.85206511573943244 | 0.881446671454585329 |
pielou_evenness | UC | CD | 0.98353217845023189 | 0.983532178450231886 |
An FDR-adjusted p-value (also called q-value) of 0.05 indicates that 5% of significant tests will result in false positives. In other words, an FDR of 5% means that, among all results called significant, only 5% of these are truly null.
Now lets see which parameters show significant differenc between healthy and unhealthy (all conditions except healthy) samples:
wilcox_healthy_disease <- healthy_disease %>%
summarise(Shannon = wilcox.test(shannon_entropy[condition == "healthy"], shannon_entropy[condition != "healthy"])$p.value,
Chao1 = wilcox.test(chao1[condition == "healthy"], chao1[condition != "healthy"])$p.value,
Menhinick = wilcox.test(menhinick[condition == "healthy"], menhinick[condition != "healthy"])$p.value,
Margalef = wilcox.test(margalef[condition == "healthy"], margalef[condition != "healthy"])$p.value,
Pielou = wilcox.test(pielou_evenness[condition == "healthy"], pielou_evenness[condition != "healthy"])$p.value,
Fisher = wilcox.test(fisher_alpha[condition == "healthy"], fisher_alpha[condition != "healthy"])$p.value,
Gini = wilcox.test(gini_index[condition == "healthy"], gini_index[condition != "healthy"])$p.value,
Strong = wilcox.test(strong[condition == "healthy"], strong[condition != "healthy"])$p.value,
Faith = wilcox.test(faith_pd[condition == "healthy"], faith_pd[condition != "healthy"])$p.value)
wilcox_healthy_disease <- t(wilcox_healthy_disease)
colnames(wilcox_healthy_disease) <- c("p.value")
wilcox_healthy_disease <- data.frame(Alpha_Metric = row.names(wilcox_healthy_disease), wilcox_healthy_disease)
wilcox_healthy_disease$p.value <- round(wilcox_healthy_disease$p.value, digits = 17)
wilcox_healthy_disease %>%
add_column(p.adjusted = p.adjust(wilcox_healthy_disease$p.value, "fdr"), .after='p.value') %>%
flextable() %>%
bold(~ p.value < 0.05, 2) %>%
bold(~ p.adjusted < 0.05, 3) %>%
add_header_lines(values = "Results of the Wilcox test for distributions of different parameters between healthy and unhealthy samples")
Results of the Wilcox test for distributions of different parameters between healthy and unhealthy samples | ||
Alpha_Metric | p.value | p.adjusted |
Shannon | 0.01476801717769463 | 0.016614019324906464 |
Chao1 | 0.00004731531506923 | 0.000085167567124614 |
Menhinick | 0.00000000000000003 | 0.000000000000000135 |
Margalef | 0.00008380741351682 | 0.000125711120275230 |
Pielou | 0.43593092891630170 | 0.435930928916301696 |
Fisher | 0.00000021440198670 | 0.000000643205960100 |
Gini | 0.00000000000000000 | 0.000000000000000000 |
Strong | 0.00001603861445901 | 0.000036086882532773 |
Faith | 0.01092908258305631 | 0.014051677606786685 |
There is significant difference between healthy samples and those that are not healthy in all alpha metrics except from Pielou’s evenness.
Kruskal-Wallis Testis a non-parametric method for testing whether samples originate from the same distribution. It is used for comparing two or more independent samples of equal or different sample sizes. It extends the Mann–Whitney U test, which is used for comparing only two groups (source: Wikipedia)
kruskal_results <- healthy_disease %>%
summarise(Shannon = kruskal.test(healthy_disease$shannon_entropy ~ healthy_disease$condition)$p.value,
Chao1 = kruskal.test(healthy_disease$chao1 ~ healthy_disease$condition)$p.value,
Fisher = kruskal.test(healthy_disease$fisher_alpha ~ healthy_disease$condition)$p.value,
Margalef =kruskal.test(healthy_disease$margalef ~ healthy_disease$condition)$p.value,
Simpson = kruskal.test(healthy_disease$simpson ~ healthy_disease$condition)$p.value,
Menhinick = kruskal.test(healthy_disease$menhinick ~ healthy_disease$condition)$p.value,
Pielou = kruskal.test(healthy_disease$pielou_evenness ~ healthy_disease$condition)$p.value,
Gini = kruskal.test(healthy_disease$gini_index ~ healthy_disease$condition)$p.value,
Strong = kruskal.test(healthy_disease$strong ~ healthy_disease$condition)$p.value,
Faith = kruskal.test(healthy_disease$faith_pd ~ healthy_disease$condition)$p.value)
kruskal_results <- as.data.frame(t(kruskal_results))
colnames(kruskal_results) <- c("p.value")
kruskal_results <- data.frame(Alpha_Metric = row.names(kruskal_results), kruskal_results)
kruskal_results$p.value <- round(kruskal_results$p.value, digits = 17)
kruskal_results %>%
add_column(p.adjusted = p.adjust(kruskal_results$p.value, "fdr"), .after='p.value') %>%
flextable() %>%
bold(~ p.value < 0.05, 2) %>%
bold(~ p.adjusted < 0.05, 3) %>%
add_header_lines(values = "Results of the Kruskal-Wallis test for differentiation of different parameters across different conditions")
Results of the Kruskal-Wallis test for differentiation of different parameters across different conditions | ||
Alpha_Metric | p.value | p.adjusted |
Shannon | 0.0343191764454013 | 0.0428989705567517 |
Chao1 | 0.0000024229499877 | 0.0000048458999754 |
Fisher | 0.0000000769073585 | 0.0000001922683963 |
Margalef | 0.0000127121266558 | 0.0000211868777596 |
Simpson | 0.5502770385790263 | 0.6114189317544737 |
Menhinick | 0.0000000000000001 | 0.0000000000000005 |
Pielou | 0.7350820060370993 | 0.7350820060370993 |
Gini | 0.0000000000000000 | 0.0000000000000000 |
Strong | 0.0000742732693699 | 0.0001061046705284 |
Faith | 0.0000000000000003 | 0.0000000000000010 |
This comparison shows us that there IS a difference between alpha diversity of healthy and IBD samples. The downside of this analysis is the small sample size of IBD dataset (UC and CD).
Since previous analysis consisted from small sample size for IBD dataset and Chron’s disease samples showed unexpectedly high alpha diversity in various indexes, lets incorporate another study, this time only with CD patients. The sample size of this data set is 688. The number of patients and controls is balanced and controls are close relatives of patients. each patietn was sampled multiple times during the period of 6 weeks. Some patients previously underwent a surgical intervention on some part of their dygestive system.
table(CD_2$description, CD_2$condition)
##
## control crohns
## father family 01-00010 27 0
## father family 01-00011 20 0
## father family 01-00012 54 0
## father family 01-00013 27 0
## fecal sample index pt 20 0 20
## fecal sample index pt 21 0 25
## fecal sample index pt 23 0 21
## fecal sample index pt 24 0 26
## fecal sample index pt 25 0 14
## fecal sample index pt 26 0 16
## fecal sample index pt 27 0 14
## fecal sample index pt 31 0 10
## fecal sample index pt 32 0 22
## fecal sample index pt 33 0 20
## fecal sample index pt 34 0 2
## fecal sample index pt 35 0 14
## fecal sample index pt 36 0 9
## fecal sample index pt 37 0 22
## fecal sample index pt 39 0 26
## index family 01-00012 0 48
## index family 01-00013 0 26
## mother family 01-00010 25 0
## mother family 01-00011 23 0
## mother family 01-00012 57 0
## mother family 01-00013 27 0
## sibling family 01-00010 18 0
## sibling family 01-00012 55 0
## sibling family 01-00013 20 0
table(CD_2$condition)
##
## control crohns
## 353 335
table(CD_2$surgery_and_ibd)
##
## control crohns crohns (surgery)
## 353 173 162
Lets do Violin plot to see the difference in alpha diversity between cases and controls in this study:
violin_CD_2a <- vector('list', length(metric))
for (i in 1:length(metric)){
mean_line <- CD_2 %>% dplyr::group_by(condition) %>% dplyr::summarise(grp_mean = mean(.data[[metric[i]]]))
violin_CD_2a [[i]] <- CD_2 %>% ggplot(aes(x = .data[[metric[i]]], y = condition, color = condition, fill = condition)) +
geom_violin()+
geom_boxplot(width=0.1, color="grey", alpha=0.2) +
geom_vline(data = mean_line, aes(xintercept = grp_mean, color = condition), linetype = "dashed")+
labs(x = metric[i])+
ylab("") +
theme(legend.position="none")
if(metric[i] != "shannon_entropy" & metric[i] !="strong" & metric[i] != "gini_index" & metric[i] != "menhinick"){
violin_CD_2a [[i]] <- violin_CD_2a [[i]] +
scale_x_continuous(trans = 'log10') +
xlab(paste(metric[i], "(log10)", sep = " "))
}
}
#plots for Shannon entropy
violin_CD_2a
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
grid.arrange(violin_CD_2a[[1]], violin_CD_2a[[2]],violin_CD_2a[[3]], violin_CD_2a[[4]],violin_CD_2a[[5]], violin_CD_2a[[6]],violin_CD_2a[[7]], violin_CD_2a[[8]],violin_CD_2a[[9]],violin_CD_2a[[10]], ncol=4, top = textGrob("Distributions of 1) Shannon's index 2) Chao1 3) Menhinick's index \n4) Margalef's index 5) Fisher's index 6) Simpson 7) Gini index 8) Strong's index 9) Pielou's evenness \nand 10) Faith's PD in IBD datasets", gp=gpar(fontsize=10,font=2)))
Lets now compare alpha diversity between cases and controls but taking into account if they underwent surgery:
violin_CD_2b <- vector('list', length(metric))
for (i in 1:length(metric)){
mean_line <- CD_2 %>% dplyr::group_by(surgery_and_ibd) %>% dplyr::summarise(grp_mean = mean(.data[[metric[i]]]))
violin_CD_2b [[i]] <- CD_2 %>% ggplot(aes(x = .data[[metric[i]]], y = surgery_and_ibd, color = surgery_and_ibd, fill = surgery_and_ibd)) +
geom_violin()+
geom_boxplot(width=0.1, color="grey", alpha=0.2) +
geom_vline(data = mean_line, aes(xintercept = grp_mean, color = surgery_and_ibd), linetype = "dashed")+
labs(x = metric[i])+
ylab("") +
theme(legend.position="none")
if(metric[i] != "shannon_entropy" & metric[i] !="strong" & metric[i] != "gini_index" & metric[i] != "menhinick"){
violin_CD_2b [[i]] <- violin_CD_2b [[i]] +
scale_x_continuous(trans = 'log10') +
xlab(paste(metric[i], "(log10)", sep = " "))
}
}
#plots for Shannon entropy
violin_CD_2b
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
Lets plot differences in effect of different surgical interventions:
violin_CD_2_surg <- vector('list', length(metric))
for (i in 1:length(metric)){
mean_line <- CD_2 %>% dplyr::group_by(surgery_type) %>% dplyr::summarise(grp_mean = mean(.data[[metric[i]]]))
plot_data <- CD_2 %>% dplyr::group_by(surgery_type) %>% dplyr::mutate(m = mean(.data[[metric[i]]]))
violin_CD_2_surg [[i]] <- plot_data %>% ggplot(aes(x = .data[[metric[i]]], y = reorder(surgery_type, -m), color = surgery_type, fill = surgery_type)) +
geom_violin()+
geom_boxplot(width=0.1, color="grey", alpha=0.2) +
geom_vline(data = mean_line, aes(xintercept = grp_mean, color = surgery_type), linetype = "dashed")+
labs(x = metric[i])+
ylab("") +
theme(legend.position="none")
if(metric[i] != "shannon_entropy" & metric[i] !="strong" & metric[i] != "gini_index" & metric[i] != "menhinick"){
violin_CD_2_surg [[i]] <- violin_CD_2_surg [[i]] +
scale_x_continuous(trans = 'log10') +
xlab(paste(metric[i], "(log10)", sep = " "))
}
}
#plots for Shannon entropy
violin_CD_2_surg
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
Finally, lets compare cases and control from this study with cases from previous IBD studies and AGP controls:
CD_1 <- healthy_disease[healthy_disease$condition != "UC",]
CD_2_surg <- CD_2
CD_2_surg$condition <- NULL
names(CD_2_surg)[names(CD_2_surg) == 'surgery_and_ibd'] <- 'condition'
#CD_check <- rbind.fill(CD_1, CD_2)
CD_check <- rbind.fill(CD_1, CD_2_surg)
CD_check$condition[CD_check$condition == "CD"] <-'CD_1'
CD_check$condition[CD_check$condition == "crohns"] <-'CD_2'
CD_check$condition[CD_check$condition == "crohns (surgery)"] <-'CD_2_surgery'
CD_check$condition[CD_check$condition == "healthy"] <-'control(AGP)'
CD_check$condition[CD_check$condition == "control"] <-'control_2'
violin_CD_check <- vector('list', length(metric))
for (i in 1:length(metric)){
mean_line <- CD_check %>% dplyr::group_by(condition) %>% dplyr::summarise(grp_mean = mean(.data[[metric[i]]]))
violin_CD_check [[i]] <- CD_check %>% ggplot(aes(x = .data[[metric[i]]], y = condition, color = condition, fill = condition)) +
geom_violin()+
geom_boxplot(width=0.1, color="grey", alpha=0.2) +
geom_vline(data = mean_line, aes(xintercept = grp_mean, color = condition), linetype = "dashed")+
scale_y_discrete(limits=c("CD_2", "control_2", "CD_2_surgery", "CD_1", "control(AGP)")) +
labs(x = metric[i])+
ylab("") +
theme(legend.position="none")
if(metric[i] != "shannon_entropy" & metric[i] !="strong" & metric[i] != "gini_index" & metric[i] != "menhinick"){
violin_CD_check [[i]] <- violin_CD_check [[i]] +
scale_x_continuous(trans = 'log10') +
xlab(paste(metric[i], "(log10)", sep = " "))
}
}
#plots for Shannon entropy
violin_CD_check
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
Here we can see that alpha diversity of CD samples from previous data set have similar mean values as CD_2 samples for all alpha indexes except from Faith’s PD which is much higher and Gini index which is much lower than in this longitudinal study. On the other side, CD samples with past surgery had much lower mean in all measures.
This probably mean that high diversity of CD samples is not a mistake, those are probably just samples with better clinical status.
When it comes to samples with previous surgery, it is unclear whether the lower alpha diversity comes from the severity of CD symptoms or as a direct consequence of surgery (no info about the time that passed after surgery).
test_CD_1 <- list()
test_CD_2 <- list()
for (i in 1:length(metric)){
test_CD_1[[i]] <- pairwise.wilcox.test(CD_1[[metric[i]]], CD_1$condition, p.adjust.method="none") %>%
broom::tidy() %>% add_column(parameter = metric[i], .before='group1')
test_CD_1[[i]]$p.value <- round(test_CD_1[[i]]$p.value, digits = 17)
test_CD_2[[i]] <- pairwise.wilcox.test(CD_2[[metric[i]]], CD_2$surgery_and_ibd, p.adjust.method="none") %>%
broom::tidy() %>% add_column(parameter = metric[i], .before='group1')
test_CD_2[[i]]$p.value <- round(test_CD_2[[i]]$p.value, digits = 17)
}
tests1 <- do.call(what = rbind, args = test_CD_1)
tests2 <- do.call(what = rbind, args = test_CD_2)
table_CD_1 <- tests1 %>%
add_column(p.adjusted = p.adjust(tests1$p.value, "fdr"), .after='p.value') %>%
arrange(p.value, parameter) %>%
flextable() %>%
bold(~ p.value < 0.05, 4) %>%
bold(~ p.adjusted < 0.05, 5) %>%
add_header_lines(values = "Results of the Wilcox test for distributions of different conditions in CD_1 dataset")
table_CD_2 <- tests2 %>%
add_column(p.adjusted = p.adjust(tests2$p.value, "fdr"), .after='p.value') %>%
arrange(p.value, parameter) %>%
flextable() %>%
bold(~ p.value < 0.05, 4) %>%
bold(~ p.adjusted < 0.05, 5) %>%
add_header_lines(values = "Results of the Wilcoxon test for distributions of different conditions in CD_2 dataset")
Results of the Wilcoxon test for diversity distributions of healthy and CD samples in first study:
table_CD_1
Results of the Wilcox test for distributions of different conditions in CD_1 dataset | ||||
parameter | group1 | group2 | p.value | p.adjusted |
gini_index | CD | healthy | 0.00000000000000009 | 0.0000000000000009 |
faith_pd | CD | healthy | 0.00000000000000076 | 0.0000000000000038 |
menhinick | CD | healthy | 0.00002719515391959 | 0.0000906505130653 |
strong | CD | healthy | 0.00143769914583817 | 0.0035942478645954 |
fisher_alpha | CD | healthy | 0.14776457972183557 | 0.2955291594436711 |
shannon_entropy | CD | healthy | 0.39124311325787869 | 0.6520718554297978 |
pielou_evenness | CD | healthy | 0.57113507023141241 | 0.8150519195408884 |
margalef | CD | healthy | 0.65204153563271072 | 0.8150519195408884 |
chao1 | CD | healthy | 0.82814304463167232 | 0.8520651157394324 |
simpson | CD | healthy | 0.85206511573943244 | 0.8520651157394324 |
This table shows that, in first study, only four indexes show signifficant difference between healthy and CD samples (Gini, Faith, Menhinick, Strong).
Results of the Wilcoxon test for diversity distributions of healthy and CD samples in second (longitudinal) study:
table_CD_2
Results of the Wilcoxon test for distributions of different conditions in CD_2 dataset | ||||
parameter | group1 | group2 | p.value | p.adjusted |
faith_pd | crohns (surgery) | control | 0.00000000000000000 | 0.00000000000000000000000 |
fisher_alpha | crohns (surgery) | control | 0.00000000000000000 | 0.00000000000000000000000 |
gini_index | crohns (surgery) | control | 0.00000000000000000 | 0.00000000000000000000000 |
margalef | crohns (surgery) | control | 0.00000000000000000 | 0.00000000000000000000000 |
menhinick | crohns (surgery) | control | 0.00000000000000000 | 0.00000000000000000000000 |
pielou_evenness | crohns (surgery) | control | 0.00000000000000000 | 0.00000000000000000000000 |
shannon_entropy | crohns (surgery) | control | 0.00000000000000000 | 0.00000000000000000000000 |
simpson | crohns (surgery) | control | 0.00000000000000000 | 0.00000000000000000000000 |
chao1 | crohns (surgery) | control | 0.00000000000000001 | 0.00000000000000003333333 |
strong | crohns (surgery) | control | 0.00000000000070725 | 0.00000000000212175000000 |
faith_pd | crohns (surgery) | crohns | 0.00000000000412302 | 0.00000000001124460000000 |
gini_index | crohns (surgery) | crohns | 0.00000000019662471 | 0.00000000049156177500000 |
fisher_alpha | crohns (surgery) | crohns | 0.00000000051815197 | 0.00000000103630394000000 |
margalef | crohns (surgery) | crohns | 0.00000000051815197 | 0.00000000103630394000000 |
menhinick | crohns (surgery) | crohns | 0.00000000051815197 | 0.00000000103630394000000 |
shannon_entropy | crohns (surgery) | crohns | 0.00000000272429143 | 0.00000000510804643125000 |
chao1 | crohns (surgery) | crohns | 0.00000001017126923 | 0.00000001794929864117647 |
pielou_evenness | crohns (surgery) | crohns | 0.00000536780985689 | 0.00000894634976148333456 |
simpson | crohns (surgery) | crohns | 0.00000600629169141 | 0.00000948361846012105263 |
simpson | crohns | control | 0.00002013513551602 | 0.00003020270327402999801 |
pielou_evenness | crohns | control | 0.00003537721365162 | 0.00005053887664517142908 |
strong | crohns | control | 0.00026284991838079 | 0.00035843170688289540966 |
strong | crohns (surgery) | crohns | 0.00037910329096575 | 0.00049448255343358701701 |
shannon_entropy | crohns | control | 0.00077522271035731 | 0.00096902838794663752157 |
gini_index | crohns | control | 0.01750412729110927 | 0.02100495274933112180293 |
chao1 | crohns | control | 0.06579526636831212 | 0.07591761504036013963326 |
faith_pd | crohns | control | 0.10341083464326625 | 0.11490092738140694761384 |
fisher_alpha | crohns | control | 0.21387405361949660 | 0.21387405361949660131948 |
margalef | crohns | control | 0.21387405361949660 | 0.21387405361949660131948 |
menhinick | crohns | control | 0.21387405361949660 | 0.21387405361949660131948 |
All alpha indexes show significant difference between healthy and crohn’s (surgery) samples in longitudinal Crohn’s study. Helathy and CD samples without surgery are different in five indexes (Strong, Simpson, Pielou, Shannon, Gini).
CD_check_w <- CD_check %>% filter(CD_check$condition != "control(AGP)" & CD_check$condition != "control_2" )
test_CD_3 <- list()
for (i in 1:length(metric)){
test_CD_3[[i]] <- pairwise.wilcox.test(CD_check_w[[metric[i]]], CD_check_w$condition, p.adjust.method="none") %>%
broom::tidy() %>% add_column(parameter = metric[i], .before='group1')
test_CD_3[[i]]$p.value <- round(test_CD_3[[i]]$p.value, digits = 17)
}
tests <- do.call(what = rbind, args = test_CD_3)
table_CD_3 <- tests %>%
add_column(p.adjusted = p.adjust(tests$p.value, "fdr"), .after='p.value') %>%
arrange(p.value, parameter) %>%
flextable() %>%
bold(~ p.value < 0.05, 4) %>%
bold(~ p.adjusted < 0.05, 5) %>%
add_header_lines(values = "Results of the Wilcox test for distributions of different groups of Crhohn's disease patients")
table_CD_3
Results of the Wilcox test for distributions of different groups of Crhohn's disease patients | ||||
parameter | group1 | group2 | p.value | p.adjusted |
faith_pd | CD_2_surgery | CD_1 | 0.00000000000000318 | 0.000000000000073700 |
faith_pd | CD_2 | CD_1 | 0.00000000000000700 | 0.000000000000073700 |
gini_index | CD_2_surgery | CD_1 | 0.00000000000000737 | 0.000000000000073700 |
gini_index | CD_2 | CD_1 | 0.00000000000002937 | 0.000000000000220275 |
faith_pd | CD_2_surgery | CD_2 | 0.00000000000412302 | 0.000000000024738120 |
gini_index | CD_2_surgery | CD_2 | 0.00000000019662471 | 0.000000000983123550 |
fisher_alpha | CD_2_surgery | CD_2 | 0.00000000051815197 | 0.000000001727173233 |
margalef | CD_2_surgery | CD_2 | 0.00000000051815197 | 0.000000001727173233 |
menhinick | CD_2_surgery | CD_2 | 0.00000000051815197 | 0.000000001727173233 |
shannon_entropy | CD_2_surgery | CD_2 | 0.00000000272429143 | 0.000000008172874290 |
chao1 | CD_2_surgery | CD_2 | 0.00000001017126923 | 0.000000027739825173 |
pielou_evenness | CD_2_surgery | CD_2 | 0.00000536780985689 | 0.000013419524642225 |
simpson | CD_2_surgery | CD_2 | 0.00000600629169141 | 0.000013860673134023 |
shannon_entropy | CD_2_surgery | CD_1 | 0.00001835318609066 | 0.000039328255908557 |
simpson | CD_2_surgery | CD_1 | 0.00004727241478514 | 0.000094544829570280 |
margalef | CD_2_surgery | CD_1 | 0.00010355431737318 | 0.000194164345074712 |
fisher_alpha | CD_2_surgery | CD_1 | 0.00013726797722562 | 0.000242237606868741 |
chao1 | CD_2_surgery | CD_1 | 0.00024198165016724 | 0.000403302750278733 |
strong | CD_2_surgery | CD_2 | 0.00037910329096575 | 0.000598584143630132 |
menhinick | CD_2_surgery | CD_1 | 0.00060108041699576 | 0.000901620625493640 |
pielou_evenness | CD_2_surgery | CD_1 | 0.00090946322043473 | 0.001299233172049614 |
strong | CD_2_surgery | CD_1 | 0.06721818839753790 | 0.091661165996642591 |
simpson | CD_2 | CD_1 | 0.08731955396999834 | 0.113895070395650014 |
shannon_entropy | CD_2 | CD_1 | 0.28215387991398205 | 0.352692349892477552 |
pielou_evenness | CD_2 | CD_1 | 0.31590796277536898 | 0.379089555330442751 |
margalef | CD_2 | CD_1 | 0.47848035469736533 | 0.538275341934925544 |
chao1 | CD_2 | CD_1 | 0.48444780774143292 | 0.538275341934925544 |
fisher_alpha | CD_2 | CD_1 | 0.53972418968600622 | 0.578275917520720939 |
menhinick | CD_2 | CD_1 | 0.84999324337675142 | 0.879303355217329163 |
strong | CD_2 | CD_1 | 0.98933097188722530 | 0.989330971887225297 |
This table confirms that CD samples (without surgery) from two studies only differ in Faith’s PD and Gini index.
With this data set of rnrow(C_diff_trans)` samples we
will explore whether there FMT for recurrent CDI affects the improvement
of alpha diversity. Samples from 4 patients were collected in multiple
time points after transplantation
table(C_diff_trans$disease_state)
##
## post-FMT Pre-FMT
## 106 5
table(C_diff_trans$day_since_fmt)
##
## -18 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 35 36 40
## 1 1 1 2 3 3 2 2 2 3 3 4 4 3 4 4 3 3 3 3 4 4 2 4 3 3 2 3 1 4 2 1 1 1 1 1 1 1 1
## 42 47 49 55 56 63 70 77 84 151
## 2 1 2 1 2 2 2 2 2 1
table(C_diff_trans$animations_subject)
##
## CD1 CD2 CD3 CD4
## 36 23 16 36
C_diff_trans$animations_subject[C_diff_trans$animations_subject == "CD1"] <-'subject_1'
C_diff_trans$animations_subject[C_diff_trans$animations_subject == "CD2"] <-'subject_2'
C_diff_trans$animations_subject[C_diff_trans$animations_subject == "CD3"] <-'subject_3'
C_diff_trans$animations_subject[C_diff_trans$animations_subject == "CD4"] <-'subject_4'
progression <- vector('list', length(metric))
for (i in 1:length(metric)){
progression[[i]] <- C_diff_trans %>% ggplot(aes(x=day_since_fmt, y= .data[[metric[i]]], group=animations_subject)) +
geom_line(aes(color=animations_subject))+
geom_point(aes(color=animations_subject))+
facet_wrap(vars(animations_subject), scale="free", ncol=2)
}
progression
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
Even though the value is fluctuating as the time passes, we can see general trend of improvement of alpha diversity after FMT in all subjects.
From the paper: “A recent study suggests significantly lower response of CDI to FMT in patients with underlying inflammatory bowel disease (IBD)”.
The original study is looking at the changes in community composition (taxonomical) in patients after FMT. Lets examine what happens with alpha diversity as the time is passing after transplantation:
table(trans_IBD_CDI$condition)
##
## CDI CDI + CD CDI + UC Donors
## 28 6 6 28
table(trans_IBD_CDI$day_since_fmt)
##
## -1 28 7 NA-Donor
## 14 11 14 29
trans_IBD_CDI_1 <- trans_IBD_CDI %>%
filter(day_since_fmt != "no_data" ) %>%
filter(!(donor_or_patient == "Donor" & condition=="CDI"))
trans_IBD_CDI_1$day_since_fmt <- as.factor(trans_IBD_CDI_1$day_since_fmt)
violin_trans <- vector('list', length(metric))
for (i in 1:length(metric)){
mean_line <- trans_IBD_CDI_1 %>% dplyr::group_by(condition, day_since_fmt) %>% dplyr::summarise(grp_mean = mean(.data[[metric[i]]]))
violin_trans[[i]] <- trans_IBD_CDI_1 %>%
mutate(across(day_since_fmt, factor, levels=c("-1","7","28","NA-Donor"))) %>%
ggplot(aes(x = .data[[metric[i]]], y = condition, color = condition, fill = condition)) +
geom_violin()+
geom_boxplot(width=0.1, color="grey", alpha=0.2) +
geom_vline(data = mean_line, aes(xintercept = grp_mean, color = condition), linetype = "dashed")+
labs(x = metric[i])+
ylab("") +
facet_wrap(vars(day_since_fmt), nrow=1)+
theme(legend.position="none")
if(metric[i] != "shannon_entropy" & metric[i] !="strong" & metric[i] != "gini_index" & metric[i] != "menhinick"){
violin_trans [[i]] <- violin_trans [[i]] +
scale_x_continuous(trans = 'log10') +
xlab(paste(metric[i], "(log10)", sep = " "))
}
}
#plots for Shannon entropy
violin_trans
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
These plots show consistent trend of improvement of alpha diversity in 7th and 28th day after fecal microbiota transplantation. For all alpha indexes we can see that the diversity is increasing toward donor’s mean value. However, samples with underlying CD show a trend of diversity decrease in 28th day compared to 7th day. This shows that IBD can decrease the efficiency of FMT in CDI recepients.
cond <- c("CDI + UC", "CDI", "CDI + CD")
test_CDI_trans <- list()
table <- list()
for (i in 1:length(cond)){
trans_IBD_CDI_2 <- trans_IBD_CDI_1 %>%
filter(condition == cond[i])
for (j in 1:length(metric)){
test_CDI_trans[[j]] <- pairwise.wilcox.test(trans_IBD_CDI_2[[metric[j]]], trans_IBD_CDI_2$day_since_fmt, p.adjust.method="none") %>%
broom::tidy() %>% add_column(parameter = metric[j], .before='group1')
test_CDI_trans[[j]]$p.value <- round(test_CDI_trans[[j]]$p.value, digits = 17)
}
tests <- do.call(what = rbind, args = test_CDI_trans)
table <- tests %>%
add_column(p.adjusted = p.adjust(tests$p.value, "fdr"), .after='p.value') %>%
flextable() %>%
bold(~ p.value < 0.05, 4) %>%
bold(~ p.adjusted < 0.05, 5) %>%
add_header_lines(values = paste("Results of the Wilcox test for condition:", cond[i], sep = " "))
print(table)
test_CDI_trans <- list()
}
Now, lets merge all data sets with IBD and AGP as control:
before_trans <- trans_IBD_CDI %>%
filter(day_since_fmt != c("7","28", "no_data", "NA-Donor"),
condition != "Donors")
CD_2_merge <- CD_2 %>%
filter(condition != "not applicable")
CD_2_merge$condition[CD_2_merge$condition=="control"] <- "healthy"
CD_2_merge$condition[CD_2_merge$condition=="crohns"] <- "CD"
healthy_disease_2 <- rbind.fill(healthy_disease, before_trans, CD_2_merge)
#healthy_disease_2 <- rbind.fill(healthy_disease, before_trans)
# Sizes of each dataset
table(healthy_disease_2$condition)
##
## CD CDI CDI + CD CDI + UC healthy UC
## 359 23 5 4 953 40
ggplot(healthy_disease_2, aes(x = faith_pd)) +
geom_histogram(aes(y=..density..), colour="black", fill="white", bins=30) +
geom_density (alpha=.2, fill="#009E73") +
xlab(label = "faith_pd") +
ylab(label = "density") +
scale_x_continuous(trans = 'log10') +
facet_wrap(vars(condition), scales = "free_y")
Distribution of Faith’s PD in different groups shows modality in most of the IBD groups. *The values on y scale are densities instead of frequencies (what percentage of observations in a dataset fall between different values)
violin_all <- vector('list', length(metric))
for (i in 1:length(metric)){
mean_line <- healthy_disease_2 %>% dplyr::group_by(condition) %>% dplyr::summarise(grp_mean=mean(.data[[metric[i]]]))
plot_data <- healthy_disease_2 %>% dplyr::group_by(condition) %>% dplyr::mutate(m = mean(.data[[metric[i]]]))
violin_all[[i]] <- plot_data %>% ggplot(aes(x = .data[[metric[i]]], y = reorder(condition, -m), color = condition, fill = condition)) +
geom_violin()+
geom_boxplot(width=0.1, color="grey", alpha=0.2) +
geom_vline(data = mean_line, aes(xintercept = grp_mean, color = condition), linetype = "dashed")+
labs(x = metric[i])+
ylab("")+
ggtitle("A Violin plot of distributions of alpha metrics in different conditions")+
theme(legend.position="none")
if(metric[i] != "shannon_entropy" & metric[i] !="strong" & metric[i] != "gini_index" & metric[i] != "menhinick"){
violin_all[[i]] <- violin_all[[i]] +
scale_x_continuous(trans = 'log10') +
xlab(paste(metric[i], "(log10)", sep = " "))
}
}
#plots for Shannon entropy
violin_all
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
Non-parametric test (does not assume normal distribution)
test <- list()
for (i in 1:length(metric)){
test[[i]] <- pairwise.wilcox.test(healthy_disease_2[[metric[i]]], healthy_disease_2$condition, p.adjust.method="none") %>%
broom::tidy() %>% add_column(parameter = metric[i], .before='group1')
test[[i]]$p.value <- round(test[[i]]$p.value, digits = 17)
}
tests <- do.call(what = rbind, args = test)
table1 <- tests %>%
add_column(p.adjusted = p.adjust(tests$p.value, "fdr"), .after='p.value') %>%
arrange(p.value, parameter) %>%
flextable() %>%
bold(~ p.value < 0.05, 4) %>%
bold(~ p.adjusted < 0.05, 5) %>%
add_header_lines(values = "Results of the Wilcox test for distributions of different conditions")
table2 <- tests %>%
add_column(p.adjusted = p.adjust(tests$p.value, "fdr"), .after='p.value') %>%
arrange(parameter, group1) %>%
flextable() %>%
bold(~ p.value < 0.05, 4) %>%
bold(~ p.adjusted < 0.05, 5) %>%
add_header_lines(values = "Results of the Wilcox test for distributions of different conditions")
#table1
#table2
# Do Wilcox test to see which parameters show significant differenc between healthy and unhealthy samples
wilcox_p_value <- healthy_disease_2 %>%
summarise(Shannon = wilcox.test(shannon_entropy[condition == "healthy"], shannon_entropy[condition != "healthy"])$p.value,
Chao1 = wilcox.test(chao1[condition == "healthy"], chao1[condition != "healthy"])$p.value,
Menhinick = wilcox.test(menhinick[condition == "healthy"], menhinick[condition != "healthy"])$p.value,
Margalef = wilcox.test(margalef[condition == "healthy"], margalef[condition != "healthy"])$p.value,
Simpson = wilcox.test(simpson[condition == "healthy"], simpson[condition != "healthy"])$p.value,
Fisher = wilcox.test(fisher_alpha[condition == "healthy"], fisher_alpha[condition != "healthy"])$p.value,
Pielou = wilcox.test(pielou_evenness[condition == "healthy"], pielou_evenness[condition != "healthy"])$p.value,
Gini = wilcox.test(gini_index[condition == "healthy"], gini_index[condition != "healthy"])$p.value,
Strong = wilcox.test(strong[condition == "healthy"], strong[condition != "healthy"])$p.value,
Faith = wilcox.test(faith_pd[condition == "healthy"], faith_pd[condition != "healthy"])$p.value)
wilcox_p_value <- t(wilcox_p_value)
colnames(wilcox_p_value) <- c("p.value")
wilcox_p_value <- data.frame(Alpha_Metric = row.names(wilcox_p_value), wilcox_p_value)
wilcox_p_value$p.value <- round(wilcox_p_value$p.value, digits = 17)
wilcox_p_value %>%
flextable() %>%
bold(~ p.value < 0.05, 2) %>%
add_header_lines(values = "Results of the Wilcox test for distributions of different parameters between healthy and unhealthy samples")
Results of the Wilcox test for distributions of different parameters between healthy and unhealthy samples | |
Alpha_Metric | p.value |
Shannon | 0.00000000000000000 |
Chao1 | 0.00000000000000000 |
Menhinick | 0.00000000000000000 |
Margalef | 0.00000000000000000 |
Simpson | 0.00000000000000000 |
Fisher | 0.00000000000000000 |
Pielou | 0.00000000000000178 |
Gini | 0.00000000000000000 |
Strong | 0.00000000000000000 |
Faith | 0.00000000000011414 |
Based on previous analysis we can draw following conclusions:
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8
## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] rmarkdown_2.17 knitr_1.40 ggplotify_0.1.0 ComplexHeatmap_2.10.0 data.table_1.14.4
## [6] lubridate_1.8.0 forcats_0.5.2 readr_2.1.4 tidyr_1.2.1 tidyverse_1.3.2.9000
## [11] plotly_4.10.1 shiny_1.7.3 here_1.0.1 corrplot_0.92 PerformanceAnalytics_2.0.4
## [16] xts_0.12.1 zoo_1.8-9 nortest_1.0-4 flextable_0.8.2 cowplot_1.1.1
## [21] plyr_1.8.7 CGPfunctions_0.6.3 gridExtra_2.3 RColorBrewer_1.1-3 stringr_1.4.1
## [26] ggplot2_3.4.0 tibble_3.1.8 purrr_0.3.5 dplyr_1.0.10
##
## loaded via a namespace (and not attached):
## [1] circlize_0.4.15 readxl_1.4.1 uuid_1.1-0 backports_1.4.1 systemfonts_1.0.4 lazyeval_0.2.2
## [7] splines_4.1.2 TH.data_1.1-0 digest_0.6.30 yulab.utils_0.0.5 foreach_1.5.2 htmltools_0.5.3
## [13] rsconnect_0.8.25 fansi_1.0.3 memoise_2.0.1 magrittr_2.0.3 cluster_2.1.2 doParallel_1.0.17
## [19] paletteer_1.5.0 tzdb_0.3.0 modelr_0.1.9 matrixStats_0.61.0 officer_0.4.4 sandwich_3.0-1
## [25] colorspace_2.0-3 ggrepel_0.9.1 textshaping_0.3.6 xfun_0.37 crayon_1.5.2 jsonlite_1.8.3
## [31] libcoin_1.0-9 Exact_3.2 lme4_1.1-28 iterators_1.0.14 survival_3.2-13 glue_1.6.2
## [37] gtable_0.3.1 emmeans_1.8.2 MatrixModels_0.5-0 GetoptLong_1.0.5 sjstats_0.18.2 sjmisc_2.8.9
## [43] shape_1.4.6 BiocGenerics_0.40.0 scales_1.2.1 mvtnorm_1.1-3 DBI_1.1.3 Rcpp_1.0.8
## [49] viridisLite_0.4.1 xtable_1.8-4 performance_0.10.1 clue_0.3-62 gridGraphics_0.5-1 proxy_0.4-26
## [55] Formula_1.2-4 stats4_4.1.2 datawizard_0.6.4 htmlwidgets_1.5.4 httr_1.4.4 ellipsis_0.3.2
## [61] pkgconfig_2.0.3 farver_2.1.1 sass_0.4.2 utf8_1.2.2 tidyselect_1.2.0 labeling_0.4.2
## [67] rlang_1.0.6 later_1.3.0 cachem_1.0.6 munsell_0.5.0 cellranger_1.1.0 tools_4.1.2
## [73] cli_3.4.1 generics_0.1.3 sjlabelled_1.2.0 broom_1.0.1 evaluate_0.17 fastmap_1.1.0
## [79] ragg_1.2.1 yaml_2.3.6 rematch2_2.1.2 zip_2.2.0 ggmosaic_0.3.3 rootSolve_1.8.2.3
## [85] pbapply_1.6-0 nlme_3.1-155 mime_0.12 xml2_1.3.3 compiler_4.1.2 rstudioapi_0.14
## [91] png_0.1-7 e1071_1.7-9 bslib_0.4.1 DescTools_0.99.47 stringi_1.7.8 highr_0.9
## [97] gdtools_0.2.4 lattice_0.20-45 Matrix_1.4-0 nloptr_2.0.0 vctrs_0.5.0 pillar_1.8.1
## [103] lifecycle_1.0.3 GlobalOptions_0.1.2 jquerylib_0.1.4 estimability_1.4.1 insight_0.18.8 lmom_2.9
## [109] httpuv_1.6.5 R6_2.5.1 promises_1.2.0.1 IRanges_2.28.0 BayesFactor_0.9.12-4.4 gld_2.6.6
## [115] codetools_0.2-18 boot_1.3-28 MASS_7.3-55 assertthat_0.2.1 fontawesome_0.4.0 rjson_0.2.21
## [121] rprojroot_2.0.2 withr_2.5.0 S4Vectors_0.32.4 multcomp_1.4-18 bayestestR_0.13.0 expm_0.999-6
## [127] parallel_4.1.2 hms_1.1.2 quadprog_1.5-8 rpart_4.1.16 coda_0.19-4 class_7.3-20
## [133] minqa_1.2.4 inum_1.0-4 partykit_1.2-16 base64enc_0.1-3